*This file can be used to clean the GSS data and to merge it to the data files and variables used in the paper "Macroeconomic Conditions When Young Shape Job Preferences for Life"

* This file should be run after macro_cleaning.do

* Datasets used in this file and their sources:

*1. The General Social Survey is freely available to download from: https://gss.norc.org/

*2. National unemployment rates are available on a yearly basis the Bureau of Labour statistics, from 1929 (NationalUnemployment.dta).

*3. Regional income levels are available on a yearly basis from the Bureau of Economic Analysis, from 1929. (RegionalIncome.dta)

*4. RegionState.dta has information on the correspondence between US states and GSS regions, as well as data on state-level population.

*5. Data on the US population each year since 1929 is provided in the file population.dta

*6. cpi.dta is used to adjust for inflation, with base year 1982-1984 as defined by the US department of labor; to adjust to 2017 US dollars, we use the 2017 CPI of 245.1



clear all
set maxvar 10000


cd ""

use "Gss.dta"

//keeping only required variables//

keep parborn year id wtssall sex age educ marital wrkstat wrkslf indus10 occ10 spwrksta childs paeduc incom16 maeduc speduc degree padeg madeg spdeg race born hompop realinc realrinc income rincome region jobinc jobsec jobhour jobprom jobmeans wrkslf wrkgovt wtss wtssnr wtssall vstrat vpsu reg16 
corr jobinc jobsec jobhour jobpromo jobmeans
corr jobinc jobsec jobhour jobpromo jobmeans

* recode dependent variables such that a higher ranking corresponds to higher importance

recode jobmeans (1=5) (2=4) (4=2) (5=1)
recode jobinc (1=5) (2=4) (4=2) (5=1)
recode jobsec (1=5) (2=4) (4=2) (5=1)
recode jobhour (1=5) (2=4) (4=2) (5=1)
recode jobpromo (1=5) (2=4) (4=2) (5=1)


label define ranking 1 "fifth" 2 "fourth" 3 "third" 4 "second" 5 "most imp"

label values jobmeans ranking
label values jobinc ranking
label values jobsec ranking
label values jobhour ranking
label values jobpromo ranking


* generate constructed variables 

gen Agecohort=.
replace Agecohort=1 if age>=18 & age<30
replace Agecohort=2 if age>=30 & age<40
replace Agecohort=3 if age>=40 & age<50
replace Agecohort=4 if age>=50 & age<60
replace Agecohort=5 if age>=60

gen birth=year-age
gen employed=0
replace employed=1 if wrkstat==1 | wrkstat==2

gen workforce=1
replace workforce=0 if (wrkstat==5 | wrkstat==7 | wrkstat==8)

tabulate region, generate(region_dummies)
tabulate wrkstat, generate(workstatus_dummies)
tabulate marital, generate(marital_dummies)
tabulate year, generate(year_dummies)

impute realinc educ workstatus_dummies1 workstatus_dummies2 workstatus_dummies3 workstatus_dummies4 workstatus_dummies5 workstatus_dummies6 workstatus_dummies7 workstatus_dummies8 age region_dummies1 region_dummies2 region_dummies3 region_dummies4 region_dummies5 region_dummies6 region_dummies7 region_dummies8 region_dummies9 hompop sex year marital_dummies1 marital_dummies2 marital_dummies3 marital_dummies4 marital_dummies5, generate(income_imputed)
impute realrinc educ workstatus_dummies1 workstatus_dummies2 workstatus_dummies3 workstatus_dummies4 workstatus_dummies5 workstatus_dummies6 workstatus_dummies7 workstatus_dummies8 age region_dummies1 region_dummies2 region_dummies3 region_dummies4 region_dummies5 region_dummies6 region_dummies7 region_dummies8 region_dummies9 hompop sex year marital_dummies1 marital_dummies2 marital_dummies3 marital_dummies4 marital_dummies5, generate(income_imputedR)

gen lnincome=log(income_imputed)
gen lnincomeR=log(income_imputedR)

gen income_na=0
replace income_na=1 if realinc==. | realinc==.i

gen income_naR=0
replace income_naR=1 if realrinc==. | realrinc==.i

gen paeduc_imp=paeduc
egen m_paeduc=mean(paeduc)
replace paeduc_imp=m_paeduc if paeduc==.d | paeduc==.i | paeduc==. | paeduc==.n

gen paeduc_m=0
replace paeduc_m=1 if paeduc==. | paeduc==.i | paeduc==.d | paeduc==.n

gen maeduc_imp=maeduc
egen m_maeduc=mean(maeduc)
replace maeduc_imp=m_maeduc if maeduc==.d | maeduc==.i | maeduc==. | maeduc==.n

gen maeduc_m=0
replace maeduc_m=1 if maeduc==. | maeduc==.i | maeduc==.d | maeduc==.n

gen incom16_imp=incom16
egen m_incom16=mean(incom16)
replace incom16_imp=m_incom16 if incom16==.a | incom16==.i | incom16==. 

gen incom16_m=0
replace incom16_m=1 if incom16==. | incom16==.i | incom16==.a 

impute wrkslf educ workstatus_dummies1 workstatus_dummies2 workstatus_dummies3 workstatus_dummies4 workstatus_dummies5 workstatus_dummies6 workstatus_dummies7 workstatus_dummies8 age region_dummies1 region_dummies2 region_dummies3 region_dummies4 region_dummies5 region_dummies6 region_dummies7 region_dummies8 region_dummies9 hompop sex year marital_dummies1 marital_dummies2 marital_dummies3 marital_dummies4 marital_dummies5, generate(wrkslf_imputed)

gen wrkslf_na=0
replace wrkslf_na=1 if wrkslf==. | wrkslf==.i

gen sqrt_hshsize= sqrt(hompop)

*save cleaned dataset, restricted to a smaller number of variables
save "Gss_restricted.dta", replace


///////// Unemployment Rate and Income Levels ////////

/// I. National Unemployment/// 

clear all
use "Gss_restricted.dta"

sort year

merge year using "NationalUnemployment.dta"

rename _merge mergeunemployment


save "Gss_clean.dta", replace


//// II. National and Regional Income levels ////


clear all
use "RegionalIncome"

sort year
merge year using "cpi"
drop _merge
sort year 
merge year using "population"
drop _merge


sort region year
gen inc1=income_capita*population
egen population_sum=sum(population), by (region year)
egen inc_sum=sum(inc1), by (region year)

by region year: gen income_capita_region=inc_sum/population_sum


sort year
egen population_sum_us=sum(population), by (year)
egen inc_sum_us=sum(inc1), by (year)

by year: gen income_capita_us=inc_sum_us/population_sum_us

collapse income_capita_region income_capita_us cpi population_sum popUS, by(year region)

gen income_capita_region_adjusted=(income_capita_region*245.1)/cpi
gen income_capita_us_adjusted=(income_capita_us*245.1)/cpi

rename income_capita_region_adjusted inccap_R
rename income_capita_us_adjusted inccap_US

gen loginccap_R=ln(inccap_R)
	
save "FinalIncome", replace

////////////////////Experience during impressionale years/////////

// 1. Income levels //

//GSS year 2016//

clear all
use "FinalIncome"

expand 58
*create 58 rows for each year-region combination, corresponding to ages 18-75 (58 years)

sort region year

*create the age for everyone between 18-75 for each year-region combination
by region year: gen age=17+ _n
*calculate the birth year for everyone, given the age that they have at the time of the survey
by region year: gen birth=1999- _n

keep if year<=2016

*based on the year of birth, calculate the unemployment rates each individual was exposed to, while being alive
gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.

*this only keeps unemployment rates in years in which individuals were alive, based on their age in the year of the GSS survey

keep if exposure_cutoff==1

*generate the age of a GSS respondent at each unemployment rate
gen age_year=year-birth
rename year year2
gen year=2016

save "income_2016_1825.dta", replace

* We repeat the same procedure for each of the 18 years in the GSS in which job preferences are measured


//GSS year 2014//

clear all
use "FinalIncome"

expand 58


sort region year
by region year: gen age=17+ _n

by region year: gen birth=1997- _n

keep if year<=2014

gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.

keep if exposure_cutoff==1

gen age_year=year-birth

rename year year2
gen year=2014

save "income_2014_1825.dta", replace

//GSS year 2012//

clear all
use "FinalIncome"

expand 58

sort region year

by region year: gen age=17+ _n
by region year: gen birth=1995- _n

keep if year<=2012

gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.
keep if exposure_cutoff==1
gen age_year=year-birth
rename year year2
gen year=2012

save "income_2012_1825.dta", replace

//GSS year 2010//

clear all
use "FinalIncome"

expand 58

sort region year

by region year: gen age=17+ _n
by region year: gen birth=1993- _n

keep if year<=2010

gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.
keep if exposure_cutoff==1
gen age_year=year-birth
rename year year2
gen year=2010

save "income_2010_1825.dta", replace

//GSS year 2008//

clear all
use "FinalIncome"

expand 58

sort region year

by region year: gen age=17+ _n
by region year: gen birth=1991- _n

keep if year<=2008

gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.
keep if exposure_cutoff==1
gen age_year=year-birth
rename year year2
gen year=2008

save "income_2008_1825.dta", replace


//GSS year 2006//

clear all
use "FinalIncome"

expand 58

sort region year

by region year: gen age=17+ _n
by region year: gen birth=1989- _n

keep if year<=2006

gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.
keep if exposure_cutoff==1
gen age_year=year-birth
rename year year2
gen year=2006

save "income_2006_1825.dta", replace

//GSS year 2004//

clear all
use "FinalIncome"

expand 58

sort region year

by region year: gen age=17+ _n
by region year: gen birth=1987- _n

keep if year<=2004

gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.
keep if exposure_cutoff==1
gen age_year=year-birth
rename year year2
gen year=2004

save "income_2004_1825.dta", replace

//GSS year 2002//

clear all
use "FinalIncome"

expand 58

sort region year

by region year: gen age=17+ _n
by region year: gen birth=1985- _n

keep if year<=2002

gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.
keep if exposure_cutoff==1
gen age_year=year-birth
rename year year2
gen year=2002

save "income_2002_1825.dta", replace

//GSS year 2000//

clear all
use "FinalIncome"

expand 58

sort region year

by region year: gen age=17+ _n
by region year: gen birth=1983- _n

keep if year<=2000

gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.
keep if exposure_cutoff==1
gen age_year=year-birth
rename year year2
gen year=2000

save "income_2000_1825.dta", replace



//GSS year 1998//

clear all
use "FinalIncome"
expand 58

sort region year

by region year: gen age=17+ _n
by region year: gen birth=1981- _n

keep if year<=1998

gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.
keep if exposure_cutoff==1
gen age_year=year-birth
rename year year2
gen year=1998

save "income_1998_1825.dta", replace

//GSS year 1996//

clear all
use "FinalIncome"

expand 58

sort region year

by region year: gen age=17+ _n
by region year: gen birth=1979- _n

keep if year<=1996

gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.
keep if exposure_cutoff==1
gen age_year=year-birth
rename year year2
gen year=1996

save "income_1996_1825.dta", replace

//GSS year 1994//

clear all
use "FinalIncome"

expand 58

sort region year

by region year: gen age=17+ _n
by region year: gen birth=1977- _n

keep if year<=1994

gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.
keep if exposure_cutoff==1
gen age_year=year-birth
rename year year2
gen year=1994

save "income_1994_1825.dta", replace

//GSS year 1993//

clear all
use "FinalIncome"

expand 58

sort region year

by region year: gen age=17+ _n
by region year: gen birth=1976- _n

keep if year<=1993

gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.
keep if exposure_cutoff==1
gen age_year=year-birth
rename year year2
gen year=1993

save "income_1993_1825.dta", replace

//GSS year 1991//

clear all
use "FinalIncome"

expand 58

sort region year

by region year: gen age=17+ _n
by region year: gen birth=1974- _n

keep if year<=1991

gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.
keep if exposure_cutoff==1
gen age_year=year-birth
rename year year2
gen year=1991

save "income_1991_1825.dta", replace

//GSS year 1990//

clear all
use "FinalIncome"

expand 58

sort region year

by region year: gen age=17+ _n
by region year: gen birth=1973- _n

keep if year<=1990

gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.
keep if exposure_cutoff==1
gen age_year=year-birth
rename year year2
gen year=1990

save "income_1990_1825.dta", replace

//GSS year 1989//

clear all
use "FinalIncome"

expand 58

sort region year

by region year: gen age=17+ _n
by region year: gen birth=1972- _n

keep if year<=1989

gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.
keep if exposure_cutoff==1
gen age_year=year-birth
rename year year2
gen year=1989

save "income_1989_1825.dta", replace


//GSS year 1988//

clear all
use "FinalIncome"

expand 58

sort region year

by region year: gen age=17+ _n
by region year: gen birth=1971- _n

keep if year<=1988

gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.
keep if exposure_cutoff==1
gen age_year=year-birth
rename year year2
gen year=1988

save "income_1988_1825.dta", replace

//GSS year 1987//

clear all
use "FinalIncome"

expand 58

sort region year

by region year: gen age=17+ _n
by region year: gen birth=1970- _n

keep if year<=1987

gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.
keep if exposure_cutoff==1
gen age_year=year-birth
rename year year2
gen year=1987

save "income_1987_1825.dta", replace

//GSS year 1986//

clear all
use "FinalIncome"

expand 58

sort region year

by region year: gen age=17+ _n
by region year: gen birth=1969- _n

keep if year<=1986

gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.
keep if exposure_cutoff==1
gen age_year=year-birth
rename year year2
gen year=1986

save "income_1986_1825.dta", replace


//GSS year 1985//

clear all
use "FinalIncome"

expand 58

sort region year

by region year: gen age=17+ _n
by region year: gen birth=1968- _n

keep if year<=1985

gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.
keep if exposure_cutoff==1
gen age_year=year-birth
rename year year2
gen year=1985

save "income_1985_1825.dta", replace

//GSS year 1984//

clear all
use "FinalIncome"

expand 58

sort region year

by region year: gen age=17+ _n
by region year: gen birth=1967- _n

keep if year<=1984

gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.
keep if exposure_cutoff==1
gen age_year=year-birth
rename year year2
gen year=1984

save "income_1984_1825.dta", replace

//GSS year 1983//

clear all
use "FinalIncome"

expand 58

sort region year

by region year: gen age=17+ _n
by region year: gen birth=1966- _n

keep if year<=1983

gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.
keep if exposure_cutoff==1
gen age_year=year-birth
rename year year2
gen year=1983

save "income_1983_1825.dta", replace


//GSS year 1982//

clear all
use "FinalIncome"

expand 58

sort region year

by region year: gen age=17+ _n
by region year: gen birth=1965- _n

keep if year<=1982

gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.
keep if exposure_cutoff==1
gen age_year=year-birth
rename year year2
gen year=1982

save "income_1982_1825.dta", replace

//GSS year 1980//

clear all
use "FinalIncome"

expand 58

sort region year

by region year: gen age=17+ _n
by region year: gen birth=1963- _n

keep if year<=1980

gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.
keep if exposure_cutoff==1
gen age_year=year-birth
rename year year2
gen year=1980

save "income_1980_1825.dta", replace

//GSS year 1978//

clear all
use "FinalIncome"

expand 58

sort region year

by region year: gen age=17+ _n
by region year: gen birth=1961- _n

keep if year<=1978

gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.
keep if exposure_cutoff==1
gen age_year=year-birth
rename year year2
gen year=1978

save "income_1978_1825.dta", replace

//GSS year 1977//

clear all
use "FinalIncome"

expand 58

sort region year

by region year: gen age=17+ _n
by region year: gen birth=1960- _n

keep if year<=1977

gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.
keep if exposure_cutoff==1
gen age_year=year-birth
rename year year2
gen year=1977

save "income_1977_1825.dta", replace

//GSS year 1976//

clear all
use "FinalIncome"

expand 58

sort region year

by region year: gen age=17+ _n
by region year: gen birth=1959- _n

keep if year<=1976

gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.
keep if exposure_cutoff==1
gen age_year=year-birth
rename year year2
gen year=1976

save "income_1976_1825.dta", replace

//GSS year 1975//

clear all
use "FinalIncome"

expand 58

sort region year

by region year: gen age=17+ _n
by region year: gen birth=1958- _n

keep if year<=1975

gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.
keep if exposure_cutoff==1
gen age_year=year-birth
rename year year2
gen year=1975

save "income_1975_1825.dta", replace


//GSS year 1974//

clear all
use "FinalIncome"

expand 58

sort region year

by region year: gen age=17+ _n
by region year: gen birth=1957- _n

keep if year<=1974

gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.
keep if exposure_cutoff==1
gen age_year=year-birth
rename year year2
gen year=1974

save "income_1974_1825.dta", replace

//GSS year 1973//

clear all
use "FinalIncome"

expand 58

sort region year

by region year: gen age=17+ _n
by region year: gen birth=1956- _n

keep if year<=1973

gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.
keep if exposure_cutoff==1
gen age_year=year-birth
rename year year2
gen year=1973

save "income_1973_1825.dta", replace

clear all
use "income_2016_1825.dta"
append using "income_2014_1825.dta"
append using "income_2012_1825.dta"
append using "income_2010_1825.dta"
append using "income_2008_1825.dta"
append using "income_2006_1825.dta"
append using "income_2004_1825.dta"
append using "income_2002_1825.dta"
append using "income_2000_1825.dta"
append using "income_1998_1825.dta"
append using "income_1996_1825.dta"
append using "income_1994_1825.dta"
append using "income_1993_1825.dta"
append using "income_1991_1825.dta"
append using "income_1990_1825.dta"
append using "income_1989_1825.dta"
append using "income_1988_1825.dta"
append using "income_1987_1825.dta"
append using "income_1986_1825.dta"
append using "income_1985_1825.dta"
append using "income_1984_1825.dta"
append using "income_1983_1825.dta"
append using "income_1982_1825.dta"
append using "income_1980_1825.dta"
append using "income_1978_1825.dta"
append using "income_1977_1825.dta"
append using "income_1976_1825.dta"
append using "income_1975_1825.dta"
append using "income_1974_1825.dta"
append using "income_1973_1825.dta"

save "income_all_1825.dta", replace

clear all
use "income_all_1825.dta"

sort year age region age_year
keep if age_year>=18 & age_year<=25


*experiences during impressionable years are a weighted average of all the yearly experiences between 18 and 25
egen w_income_1825=mean(inccap_R), by(age year region)
egen w_income_1825US=mean(inccap_US), by(age year region)


egen stddev=sd(inccap_R), by(age year region)
gen sd_income_1825=ln(stddev)

rename region reg16

collapse w_income_1825 w_income_1825US sd_income_1825 popUS, by(age year reg16)

gen income_exp_1825=ln(w_income_1825)
gen income_exp_1825US=ln(w_income_1825US)

save "income_final_1825.dta", replace

// 2. Unemployment levels //

///////////////////2016//////////////////////////////////
clear all
use "unemployment29.dta"
expand 58
sort countryname
by countryname: gen birth=_n
replace birth=birth+1940
tab birth

gen age=2016-birth


reshape long yr , i(country age) j(year)

rename yr unemp

keep if year<=2016
gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.

keep if exposure_cutoff==1

gen age_year=year-birth

replace countryname="USA" if countryname=="United States"
rename year year2
gen year=2016

keep countryname age year year2 unemp age_year

save "18-25unemployment_usa2016.dta", replace

///////////////////2014//////////////////////////////////
clear all
use "unemployment29.dta"
expand 58
sort countryname
by countryname: gen birth=_n
replace birth=birth+1938
tab birth

gen age=2014-birth


reshape long yr , i(country age) j(year)

rename yr unemp

keep if year<=2014
gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.

keep if exposure_cutoff==1

gen age_year=year-birth

replace countryname="USA" if countryname=="United States"
rename year year2
gen year=2014

keep countryname age year year2 unemp age_year
save "18-25unemployment_usa2014.dta", replace

///////////////////2012//////////////////////////////////
clear all
use "unemployment29.dta"
expand 58
sort countryname
by countryname: gen birth=_n
replace birth=birth+1936
tab birth

gen age=2012-birth


reshape long yr , i(country age) j(year)

rename yr unemp

keep if year<=2012
gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.

keep if exposure_cutoff==1

gen age_year=year-birth

replace countryname="USA" if countryname=="United States"
rename year year2
gen year=2012

keep countryname age year year2 unemp age_year
save "18-25unemployment_usa2012.dta", replace

///////////////////2010//////////////////////////////////
clear all
use "unemployment29.dta"
expand 58
sort countryname
by countryname: gen birth=_n
replace birth=birth+1934
tab birth

gen age=2010-birth


reshape long yr , i(country age) j(year)

rename yr unemp

keep if year<=2010
gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.

keep if exposure_cutoff==1

gen age_year=year-birth

replace countryname="USA" if countryname=="United States"
rename year year2
gen year=2010

keep countryname age year year2 unemp age_year
save "18-25unemployment_usa2010.dta", replace

///////////////////2008//////////////////////////////////
clear all
use "unemployment29.dta"
expand 58
sort countryname
by countryname: gen birth=_n
replace birth=birth+1932
tab birth

gen age=2008-birth


reshape long yr , i(country age) j(year)

rename yr unemp

keep if year<=2008
gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.

keep if exposure_cutoff==1

gen age_year=year-birth

replace countryname="USA" if countryname=="United States"
rename year year2
gen year=2008

keep countryname age year year2 unemp age_year
save "18-25unemployment_usa2008.dta", replace

///////////////////2006//////////////////////////////////
clear all
use "unemployment29.dta"
expand 58
sort countryname
by countryname: gen birth=_n
replace birth=birth+1930
tab birth

gen age=2006-birth


reshape long yr , i(country age) j(year)

rename yr unemp

keep if year<=2006
gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.

keep if exposure_cutoff==1

gen age_year=year-birth

replace countryname="USA" if countryname=="United States"
rename year year2
gen year=2006

keep countryname age year year2 unemp age_year
save "18-25unemployment_usa2006.dta", replace

///////////////////2004//////////////////////////////////
clear all
use "unemployment29.dta"
expand 58
sort countryname
by countryname: gen birth=_n
replace birth=birth+1928
tab birth

gen age=2004-birth


reshape long yr , i(country age) j(year)

rename yr unemp

keep if year<=2004
gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.

keep if exposure_cutoff==1

gen age_year=year-birth

replace countryname="USA" if countryname=="United States"
rename year year2
gen year=2004

keep countryname age year year2 unemp age_year
save "18-25unemployment_usa2004.dta", replace

///////////////////2000//////////////////////////////////
clear all
use "unemployment29.dta"
expand 58
sort countryname
by countryname: gen birth=_n
replace birth=birth+1924
tab birth

gen age=2000-birth


reshape long yr , i(country age) j(year)

rename yr unemp

keep if year<=2000
gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.

keep if exposure_cutoff==1

gen age_year=year-birth

replace countryname="USA" if countryname=="United States"
rename year year2
gen year=2000

keep countryname age year year2 unemp age_year
save "18-25unemployment_usa2000.dta", replace

///////////////////1996//////////////////////////////////

clear all
use "unemployment29.dta"
expand 58
sort countryname
by countryname: gen birth=_n
replace birth=birth+1920
tab birth

gen age=1996-birth


reshape long yr , i(country age) j(year)

rename yr unemp

keep if year<=1996
gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.

keep if exposure_cutoff==1

gen age_year=year-birth

replace countryname="USA" if countryname=="United States"
rename year year2
gen year=1996

keep countryname age year year2 unemp age_year
save "18-25unemployment_usa1996.dta", replace


///////////////////1994//////////////////////////////////

clear all
use "unemployment29.dta"
expand 58
sort countryname
by countryname: gen birth=_n
replace birth=birth+1918
tab birth

gen age=1994-birth


reshape long yr , i(country age) j(year)

rename yr unemp

keep if year<=1994
gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.

keep if exposure_cutoff==1

gen age_year=year-birth

replace countryname="USA" if countryname=="United States"
rename year year2
gen year=1994

keep countryname age year year2 unemp age_year
save "18-25unemployment_usa1994.dta", replace

///////////////////1993//////////////////////////////////

clear all
use "unemployment29.dta"
expand 58
sort countryname
by countryname: gen birth=_n
replace birth=birth+1917
tab birth

gen age=1993-birth


reshape long yr , i(country age) j(year)

rename yr unemp

keep if year<=1993
gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.

keep if exposure_cutoff==1
gen age_year=year-birth

replace countryname="USA" if countryname=="United States"
rename year year2
gen year=1993

keep countryname age year year2 unemp age_year
save "18-25unemployment_usa1993.dta", replace

///////////////////1991//////////////////////////////////

clear all
use "unemployment29.dta"
expand 58
sort countryname
by countryname: gen birth=_n
replace birth=birth+1915
tab birth

gen age=1991-birth


reshape long yr , i(country age) j(year)

rename yr unemp

keep if year<=1991
gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.

keep if exposure_cutoff==1
gen age_year=year-birth

replace countryname="USA" if countryname=="United States"
rename year year2
gen year=1991

keep countryname age year year2 unemp age_year
save "18-25unemployment_usa1991.dta", replace

///////////////////1990//////////////////////////////////

clear all
use "unemployment29.dta"
expand 58
sort countryname
by countryname: gen birth=_n
replace birth=birth+1914
tab birth

gen age=1990-birth


reshape long yr , i(country age) j(year)

rename yr unemp

keep if year<=1990
gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.

keep if exposure_cutoff==1

gen age_year=year-birth

replace countryname="USA" if countryname=="United States"
rename year year2
gen year=1990

keep countryname age year year2 unemp age_year
save "18-25unemployment_usa1990.dta", replace

///////////////////1989//////////////////////////////////

clear all
use "unemployment29.dta"
expand 58
sort countryname
by countryname: gen birth=_n
replace birth=birth+1913
tab birth

gen age=1989-birth


reshape long yr , i(country age) j(year)

rename yr unemp

keep if year<=1989
gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.

keep if exposure_cutoff==1

gen age_year=year-birth

replace countryname="USA" if countryname=="United States"
rename year year2
gen year=1989

keep countryname age year year2 unemp age_year
save "18-25unemployment_usa1989.dta", replace

///////////////////1988//////////////////////////////////

clear all
use "unemployment29.dta"
expand 58
sort countryname
by countryname: gen birth=_n
replace birth=birth+1912
tab birth

gen age=1988-birth


reshape long yr , i(country age) j(year)

rename yr unemp

keep if year<=1988
gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.

keep if exposure_cutoff==1

gen age_year=year-birth

replace countryname="USA" if countryname=="United States"
rename year year2
gen year=1988

keep countryname age year year2 unemp age_year
save "18-25unemployment_usa1988.dta", replace

///////////////////1987//////////////////////////////////

clear all
use "unemployment29.dta"
expand 58
sort countryname
by countryname: gen birth=_n
replace birth=birth+1911
tab birth

gen age=1987-birth


reshape long yr , i(country age) j(year)

rename yr unemp

keep if year<=1987
gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.

keep if exposure_cutoff==1

gen age_year=year-birth

replace countryname="USA" if countryname=="United States"
rename year year2
gen year=1987

keep countryname age year year2 unemp age_year
save "18-25unemployment_usa1987.dta", replace



///////////////////1985//////////////////////////////////

clear all
use "unemployment29.dta"
expand 58
sort countryname
by countryname: gen birth=_n
replace birth=birth+1909
tab birth

gen age=1985-birth


reshape long yr , i(country age) j(year)

rename yr unemp

keep if year<=1985
gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.

keep if exposure_cutoff==1

gen age_year=year-birth

replace countryname="USA" if countryname=="United States"
rename year year2
gen year=1985

keep countryname age year year2 unemp age_year
save "18-25unemployment_usa1985.dta", replace

///////////////////1984//////////////////////////////////

clear all
use "unemployment29.dta"
expand 58
sort countryname
by countryname: gen birth=_n
replace birth=birth+1908
tab birth

gen age=1984-birth


reshape long yr , i(country age) j(year)

rename yr unemp

keep if year<=1984
gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.

keep if exposure_cutoff==1

gen age_year=year-birth

replace countryname="USA" if countryname=="United States"
rename year year2
gen year=1984

keep countryname age year year2 unemp age_year
save "18-25unemployment_usa1984.dta", replace

///////////////////1982//////////////////////////////////

clear all
use "unemployment29.dta"
expand 58
sort countryname
by countryname: gen birth=_n
replace birth=birth+1906
tab birth

gen age=1982-birth


reshape long yr , i(country age) j(year)

rename yr unemp

keep if year<=1982
gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.

keep if exposure_cutoff==1

gen age_year=year-birth

replace countryname="USA" if countryname=="United States"
rename year year2
gen year=1982

keep countryname age year year2 unemp age_year
save "18-25unemployment_usa1982.dta", replace


///////////////////1980//////////////////////////////////

clear all
use "unemployment29.dta"
expand 58
sort countryname
by countryname: gen birth=_n
replace birth=birth+1904
tab birth

gen age=1980-birth


reshape long yr , i(country age) j(year)

rename yr unemp

keep if year<=1980
gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.

keep if exposure_cutoff==1

gen age_year=year-birth

replace countryname="USA" if countryname=="United States"
rename year year2
gen year=1980

keep countryname age year year2 unemp age_year
save "18-25unemployment_usa1980.dta", replace

///////////////////1977//////////////////////////////////

clear all
use "unemployment29.dta"
expand 58
sort countryname
by countryname: gen birth=_n
replace birth=birth+1901
tab birth

gen age=1977-birth


reshape long yr , i(country age) j(year)

rename yr unemp

keep if year<=1977
gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.

keep if exposure_cutoff==1

gen age_year=year-birth

replace countryname="USA" if countryname=="United States"
rename year year2
gen year=1977

keep countryname age year year2 unemp age_year
save "18-25unemployment_usa1977.dta", replace

///////////////////1976//////////////////////////////////

clear all
use "unemployment29.dta"
expand 58
sort countryname
by countryname: gen birth=_n
replace birth=birth+1900
tab birth

gen age=1976-birth


reshape long yr , i(country age) j(year)

rename yr unemp

keep if year<=1976
gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.

keep if exposure_cutoff==1

gen age_year=year-birth

replace countryname="USA" if countryname=="United States"
rename year year2
gen year=1976

keep countryname age year year2 unemp age_year
save "18-25unemployment_usa1976.dta", replace

/////////////1974////////////////////////

clear all
use "unemployment29.dta"
expand 58
sort countryname
by countryname: gen birth=_n
replace birth=birth+1898
tab birth

gen age=1974-birth


reshape long yr , i(country age) j(year)

rename yr unemp

keep if year<=1974
gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.

keep if exposure_cutoff==1

gen age_year=year-birth

replace countryname="USA" if countryname=="United States"
rename year year2
gen year=1974

keep countryname age year year2 unemp age_year
save "18-25unemployment_usa1974.dta", replace

/////////////1973////////////////////////

clear all
use "unemployment29.dta"
expand 58
sort countryname
by countryname: gen birth=_n
replace birth=birth+1897
tab birth

gen age=1973-birth


reshape long yr , i(country age) j(year)

rename yr unemp

keep if year<=1973
gen exposure_cutoff=year-birth
replace exposure_cutoff=. if exposure<0
replace exposure_cutoff=1 if exposure_cutoff!=.

keep if exposure_cutoff==1

gen age_year=year-birth

replace countryname="USA" if countryname=="United States"
rename year year2
gen year=1973

keep countryname age year year2 unemp age_year
save "18-25unemployment_usa1973.dta", replace

clear all
use "18-25unemployment_usa1973.dta"
append using "18-25unemployment_usa1974.dta"
append using "18-25unemployment_usa1976.dta"
append using "18-25unemployment_usa1977.dta"
append using "18-25unemployment_usa1980.dta"
append using "18-25unemployment_usa1982.dta"
append using "18-25unemployment_usa1984.dta"
append using "18-25unemployment_usa1985.dta"
append using "18-25unemployment_usa1987.dta"
append using "18-25unemployment_usa1988.dta"
append using "18-25unemployment_usa1989.dta"
append using "18-25unemployment_usa1990.dta"
append using "18-25unemployment_usa1991.dta"
append using "18-25unemployment_usa1993.dta"
append using "18-25unemployment_usa1994.dta"
append using "18-25unemployment_usa1996.dta"
append using "18-25unemployment_usa2000.dta"
append using "18-25unemployment_usa2004.dta"
append using "18-25unemployment_usa2006.dta"
append using "18-25unemployment_usa2008.dta"
append using "18-25unemployment_usa2010.dta"
append using "18-25unemployment_usa2012.dta"
append using "18-25unemployment_usa2014.dta"
append using "18-25unemployment_usa2016.dta"

save "Gss_unemployment18-25.dta", replace

sum unemp, detail

keep if age_year>=18 & age_year<=25

egen w_unemp=mean(unemp), by(age year)
collapse w_unemp, by(age year)

merge 1:m age year using "Gss_clean.dta", nogen
gen ln_income=ln(realrinc)

save "Gss_clean.dta", replace

clear all
use "income_final_1825.dta"

merge 1:m age year reg16 using "Gss_clean.dta", nogen
merge m:1 year region using "FinalIncome.dta", nogen

gen inccurr=ln(inccap_R)
gen inccurr_us=ln(inccap_US)


drop if year<1973

gen agegroup=.
replace agegroup=1 if age>=18 & age<=25
replace agegroup=2 if age>=26 & age<=50
replace agegroup=3 if age>=51 & age<=75


gen generation=.
replace generation=1 if birth<1910
replace generation=2 if birth>=1910 & birth<1920
replace generation=3 if birth>=1920 & birth<1930
replace generation=4 if birth>=1930 & birth<1940
replace generation=5 if birth>=1940 & birth<1950
replace generation=6 if birth>=1950 & birth<1960
replace generation=7 if birth>=1960 & birth<1970
replace generation=8 if birth>=1970 & birth<1980
replace generation=9 if birth>=1980 & birth<1990
replace generation=10 if birth>=1990 & birth<2000


tab reg16, gen(reg16)
gen reg16_1=reg161*age 
gen reg16_2=reg162*age 
gen reg16_3=reg163*age
gen reg16_4=reg164*age 
gen reg16_5=reg165*age
gen reg16_6=reg166*age 
gen reg16_7=reg167*age 
gen reg16_8=reg168*age 
gen reg16_9=reg169*age 
gen reg16_10=reg1610*age 

drop reg16_1
drop reg16_10

tab agegroup, gen(agegroup)
gen agegroup_1=agegroup1*income_exp_1825
gen agegroup_2=agegroup2*income_exp_1825
gen agegroup_3=agegroup3*income_exp_1825

drop if age>75 & age<=89

drop if agegroup==.

egen birthcat = cut(birth), at(1898 1905 1910 1915 1920 1925 1930 1935 1940 1945 1950 1955 1960 1965 1970 1975 1980 1985 1990 1995 2000)
egen agecat = cut(age), at (18 21 26 31 36 41 46 51 56 61 66 71 76)

drop if reg16==0


save "Gss_final.dta", replace

